in website agent with type json, allow extract to be blank; in which case, the entire json object will be stored as the payload

Albert Sun 11 years ago
parent
commit
43194c3c1b
2 changed files with 71 additions and 34 deletions
  1. 46 34
      app/models/agents/website_agent.rb
  2. 25 0
      spec/models/agents/website_agent_spec.rb

+ 46 - 34
app/models/agents/website_agent.rb

@@ -61,7 +61,10 @@ module Agents
61 61
     end
62 62
 
63 63
     def validate_options
64
-      errors.add(:base, "url, expected_update_period_in_days, and extract are required") unless options[:expected_update_period_in_days].present? && options[:url].present? && options[:extract].present?
64
+      errors.add(:base, "url and expected_update_period_in_days are required") unless options[:expected_update_period_in_days].present? && options[:url].present?
65
+      if !options[:extract].present? && options[:type] != "json"
66
+        errors.add(:base, "extract is required for all types except json")
67
+      end
65 68
     end
66 69
 
67 70
     def check
@@ -74,45 +77,54 @@ module Agents
74 77
       request.on_success do |response|
75 78
         doc = parse(response.body)
76 79
         output = {}
77
-        options[:extract].each do |name, extraction_details|
78
-          result = if extraction_type == "json"
79
-                     output[name] = Utils.values_at(doc, extraction_details[:path])
80
-                   else
81
-                     output[name] = doc.css(extraction_details[:css]).map { |node|
82
-                       if extraction_details[:attr]
83
-                         node.attr(extraction_details[:attr])
84
-                       elsif extraction_details[:text]
85
-                         node.text()
86
-                       else
87
-                         error ":attr or :text is required on HTML or XML extraction patterns"
88
-                         return
89
-                       end
90
-                     }
91
-                   end
92
-          log "Extracting #{extraction_type} at #{extraction_details[:path] || extraction_details[:css]}: #{result}"
93
-        end
94
-
95
-        num_unique_lengths = options[:extract].keys.map { |name| output[name].length }.uniq
96
-
97
-        if num_unique_lengths.length != 1
98
-          error "Got an uneven number of matches for #{options[:name]}: #{options[:extract].inspect}"
99
-          return
100
-        end
101
-
102 80
         previous_payloads = events.order("id desc").limit(UNIQUENESS_LOOK_BACK).pluck(:payload).map(&:to_json) if options[:mode].to_s == "on_change"
103
-        num_unique_lengths.first.times do |index|
104
-          result = {}
105
-          options[:extract].keys.each do |name|
106
-            result[name] = output[name][index]
107
-            if name.to_s == 'url'
108
-              result[name] = URI.join(options[:url], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil?
109
-            end
110
-          end
111 81
 
82
+        if extraction_type == "json" && !options[:extract].present?
83
+          result = doc
112 84
           if !options[:mode] || options[:mode].to_s == "all" || (options[:mode].to_s == "on_change" && !previous_payloads.include?(result.to_json))
113 85
             log "Storing new result for '#{name}': #{result.inspect}"
114 86
             create_event :payload => result
115 87
           end
88
+        else
89
+          options[:extract].each do |name, extraction_details|
90
+            result = if extraction_type == "json"
91
+                       output[name] = Utils.values_at(doc, extraction_details[:path])
92
+                     else
93
+                       output[name] = doc.css(extraction_details[:css]).map { |node|
94
+                         if extraction_details[:attr]
95
+                           node.attr(extraction_details[:attr])
96
+                         elsif extraction_details[:text]
97
+                           node.text()
98
+                         else
99
+                           error ":attr or :text is required on HTML or XML extraction patterns"
100
+                           return
101
+                         end
102
+                       }
103
+                     end
104
+            log "Extracting #{extraction_type} at #{extraction_details[:path] || extraction_details[:css]}: #{result}"
105
+          end
106
+
107
+          num_unique_lengths = options[:extract].keys.map { |name| output[name].length }.uniq
108
+
109
+          if num_unique_lengths.length != 1
110
+            error "Got an uneven number of matches for #{options[:name]}: #{options[:extract].inspect}"
111
+            return
112
+          end
113
+      
114
+          num_unique_lengths.first.times do |index|
115
+            result = {}
116
+            options[:extract].keys.each do |name|
117
+              result[name] = output[name][index]
118
+              if name.to_s == 'url'
119
+                result[name] = URI.join(options[:url], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil?
120
+              end
121
+            end
122
+
123
+            if !options[:mode] || options[:mode].to_s == "all" || (options[:mode].to_s == "on_change" && !previous_payloads.include?(result.to_json))
124
+              log "Storing new result for '#{name}': #{result.inspect}"
125
+              create_event :payload => result
126
+            end
127
+          end
116 128
         end
117 129
       end
118 130
       hydra.queue request

+ 25 - 0
spec/models/agents/website_agent_spec.rb

@@ -155,6 +155,31 @@ describe Agents::WebsiteAgent do
155 155
         event.payload[:version].should == 2
156 156
         event.payload[:title].should == "first"
157 157
       end
158
+
159
+      it "stores the whole object if :extract is not specified" do
160
+        json = {
161
+            :response => {
162
+                :version => 2,
163
+                :title => "hello!"
164
+            }
165
+        }
166
+        stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
167
+        site = {
168
+            :name => "Some JSON Response",
169
+            :expected_update_period_in_days => 2,
170
+            :type => "json",
171
+            :url => "http://json-site.com",
172
+            :mode => :on_change
173
+        }
174
+        checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
175
+        checker.user = users(:bob)
176
+        checker.save!
177
+
178
+        checker.check
179
+        event = Event.last
180
+        event.payload[:response][:version].should == 2
181
+        event.payload[:response][:title].should == "hello!"
182
+      end
158 183
     end
159 184
   end
160 185
 end